######
library(numDeriv)
library(ks)
library(LaplacesDemon)
library(lqmm)
library(Matrix)
library(MASS)
library(MBESS)
library(quantreg)
library(gtools)
library(pbapply)

library(xtable)
library(ggpubr)

###
### Function to simulate the data
# Normal, Student-t, Cauchy
genData = function(N,BetaMat,Sigma,dist='NORM',Seed=NULL){
  # N: Number of clusters
  # BetaMat: Matrix (or vector) of location parameters
  # Sigma: Variance-covariance matrix
  # dist: multivariate distribution (NORM: normal, CAUCHY: Cauchy, MT: Student-t)
  # Seed: seed for simulations
  if(!is.null(Seed)){
    set.seed(Seed)
  }
  if(is.matrix(BetaMat)){
    q = nrow(BetaMat)
  }else{
    q = 1
  }
  t = ncol(Sigma)/q
  ClusterID = rep(1:N,each=q*t)  
  if(dist=='NORM'){
    er = mvrnorm(N,rep(0,t*q),Sigma)  
  }else{
    if(dist=='CAUCHY'){
      er = rmvc(N,rep(0,t*q),Sigma)
    }else{
      if(dist=='MT'){
        er = rmvt(N, rep(0,t*q), Sigma*(1/3), df=3)
      }else{
        return(c('distribution not available'))
      }
    }
  }
  Beta = vec(BetaMat,byrow=T)
  Z = rbinom(1,N,0.5)
  g  = c(rep(0,(N-Z)),rep(1,Z))
  x = seq(0,1,length.out = t)
  X = mapply(function(i){
    X = cbind(1,x,x*g[i])
    kronecker(diag(q),X)
  },i=1:N,SIMPLIFY = F)
  Xq = do.call(rbind,X)
  Loc = Xq%*%Beta
  
  Y = Loc + vec(er,byrow=T)
  
  
  Data = cbind(ClusterID,Y,Xq)
  p = ncol(Xq)
  colnames(Data) = c('ID','Y',paste('X',0:(p-1),sep = ''))
  return(as.data.frame(Data))
}

### Function to estimate the maximum likelihood estimator
mle.MAL = function(y,X,ID,tau,parm.ini=NULL,epsi=1e-2){
  # y: vector with the outcomes
  # X: design matrix
  # ID: identification for the clusters (individuals)
  # parm.ini: initial values for parameters
  # epsi: epsilon parameter (epsi=0 two-stages estimator, epsi>0 direct maximization)
  
  ### functions needed
  # Functions for the reparameterization of the correlation matrix
  Theta2Corr = function(d,Thetaval){
    # p: dimension the correlation matrix
    # Thetaval: vector of the Theta matix (Vec Theta)
    ThetaMat = matrix(0,d,d)
    ThetaMat[lower.tri(ThetaMat)] =Thetaval
    cos.Theta = cos(ThetaMat)
    sin.Theta = sin(ThetaMat)
    U = matrix(0,d,d)
    for(j in 2:d){
      U[j,1:j]=cumprod(c(1,sin.Theta[j,1:(j-1)]))*cos.Theta[j,1:j]
    }
    U[1,1] = 1
    R = tcrossprod(U)
    return(R)
  }
  Corr2Theta = function(R){
    # p: dimension the correlation matrix
    # Thetaval: vector of the Theta matix (Vec Theta)
    B = chol(R)
    B = t(B)
    d = ncol(R)
    U = matrix(0,d,d)
    
    U[,1] = acos(B[,1])
    
    if(d==2){U[2,1] = acos(B[2,1])}else{
      for(i in 3:(d)){
        for(j in 2:(i-1)){
          #U[i,j]=acos(B[i,j]*(1/prod(sin(acos(B[i,1:(j-1)])))))
          U[i,j]=acos(B[i,j]*(1/prod(sin(U[i,1:(j-1)]))))
        }
      }
      
    }
    Theta = U[lower.tri(U)]
    return(Theta)
  }
  TargetFun = function(Thetaval,R,W){
    p=dim(R)[1]
    W.vec = ks::vech(W)
    R.adj = Theta2Corr(p,Thetaval)
    R.adj.vec = ks::vech(R.adj)
    R.vec = ks::vech(R)
    S = sum(W.vec*(R.adj.vec-R.vec)^2)
    return(S)
  }
  GenCor.Scaling <- function(R){
    p = ncol(R)
    is.pd = is.positive.definite(R)
    if(!is.pd){
      W = matrix(1,p,p)
      Sol = optim(par=rep(0,p*(p-1)/2),fn=TargetFun,R=R,W=W)
      Ralt = Theta2Corr(p,Sol$par)
    }else{Ralt=R}
    return(Ralt)
  }
  
  # FUnctions to compute the log-likelihood
  loglike.MAL = function(parm,Y,X,tau,epsi=0){
    p = ncol(X)
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    Beta = parm[1:p]
    ds = parm[(p+1):(d+p)]
    
    if(any(ds < 0)){return(NA)}
    rho = parm[p+d+1]
    if(rho < -1 | rho > 1){return(NA)}
    Psi = invvech(c(1,rho,1))
    Psi.inv = solve(Psi)
    
    mu = e*ds
    Mu = matrix(mu,d,N)
    Loc = matrix(c(X%*%Beta),d,N)
    Sigma.tilde = t(Psi*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    Omega.tilde = as.inverse(Sigma.tilde)
    
    Yc = Y - Loc
    
    
    m_i = colSums(Yc * crossprod(Omega,Yc))
    
    dd = 2+sum(mu * crossprod(Omega,mu))
    if(epsi==0){
      Const = log(2) - log(2 * pi) * (d/2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }
    T1 = -N*logdet(Sigma) * 0.5
    T2 =  sum(Yc * crossprod(Omega,Mu))
    if(v==0){
      T3=0
    }else{
      T3 = (v/2)*sum(log(m_i + epsi))    
    }
    T4 = -N*v*0.5*log(dd)
    T6 = sqrt(dd*(m_i+epsi))
    B.T6 = besselK(T6,v)
    T5 =sum(log(B.T6))
    N*Const+T1 + T2 + T3 + T4 + T5
  }
  loglike.MAL.2 = function(parm,Y,X,tau,epsi=0){
    p = ncol(X)
    
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    Beta = parm[1:p]
    ds = parm[(p+1):(d+p)]
    if(any(ds < 0)){return(NA)}
    thetas = parm[-c(1:(p+d))]
    if(any(thetas < 0) | any(thetas > pi)){return(NA)}
    Ds = diag(ds)
    Rho = Theta2Corr(d,thetas)
    loc = c(X%*%Beta)
    Yc = Y - matrix(loc,d,N,byrow = F)
    
    
    mu = ds*e
    Mu = matrix(mu,d,N,byrow = F)
    Sigma.tilde = t(Rho*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    x.Omega.x = colSums(Yc * crossprod(Omega,Yc))
    mu.Omega.mu = colSums(Mu * crossprod(Omega,Mu))
    x.Omega.mu = colSums(Yc * crossprod(Omega,Mu))
    
    
    if(epsi==0){
      Const = - log(2 * pi) * (d/2) + log(2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }    
    sum(Const + x.Omega.mu  - logdet(Sigma) * 0.5 + (log(x.Omega.x+epsi) - 
                                                       (log(2 + mu.Omega.mu))) *(v/2) + 
          log(besselK(sqrt((2 + mu.Omega.mu) * (x.Omega.x+epsi)),v)))
  }
  
  loglike.MAL.Beta = function(parm,Y,X,tau,epsi=0,ds,rho){
    p = ncol(X)
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    Beta = parm
    Psi = invvech(c(1,rho,1))
    Psi.inv = solve(Psi)
    
    mu = e*ds
    Mu = matrix(mu,d,N)
    Loc = matrix(c(X%*%Beta),d,N)
    Sigma.tilde = t(Psi*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    Omega.tilde = as.inverse(Sigma.tilde)
    
    Yc = Y - Loc
    
    
    m_i = colSums(Yc * crossprod(Omega,Yc))
    
    dd = 2+sum(mu * crossprod(Omega,mu))
    if(epsi==0){
      Const = log(2) - log(2 * pi) * (d/2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }
    T1 = -N*logdet(Sigma) * 0.5
    T2 =  sum(Yc * crossprod(Omega,Mu))
    if(v==0){
      T3=0
    }else{
      T3 = (v/2)*sum(log(m_i + epsi))    
    }
    T4 = -N*v*0.5*log(dd)
    T6 = sqrt(dd*(m_i+epsi))
    B.T6 = besselK(T6,v)
    T5 =sum(log(B.T6))
    N*Const+T1 + T2 + T3 + T4 + T5
  }
  loglike.MAL.OtherParm = function(parm,Y,X,tau,epsi=0,Beta){
    p = ncol(X)
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    ds = parm[1:d]
    
    if(any(ds < 0)){return(NA)}
    rho = parm[d+1]
    if(rho < -1 | rho > 1){return(NA)}
    Psi = invvech(c(1,rho,1))
    Psi.inv = solve(Psi)
    
    mu = e*ds
    Mu = matrix(mu,d,N)
    Loc = matrix(c(X%*%Beta),d,N)
    Sigma.tilde = t(Psi*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    Omega.tilde = as.inverse(Sigma.tilde)
    
    Yc = Y - Loc
    
    
    m_i = colSums(Yc * crossprod(Omega,Yc))
    
    dd = 2+sum(mu * crossprod(Omega,mu))
    if(epsi==0){
      Const = log(2) - log(2 * pi) * (d/2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }
    T1 = -N*logdet(Sigma) * 0.5
    T2 =  sum(Yc * crossprod(Omega,Mu))
    if(v==0){
      T3=0
    }else{
      T3 = (v/2)*sum(log(m_i + epsi))    
    }
    T4 = -N*v*0.5*log(dd)
    T6 = sqrt(dd*(m_i+epsi))
    B.T6 = besselK(T6,v)
    T5 =sum(log(B.T6))
    N*Const+T1 + T2 + T3 + T4 + T5
  }
  
  loglike.MAL.2.Beta = function(parm,Y,X,tau,epsi=0,ds,Rho){
    p = ncol(X)
    
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    Beta = parm
    Ds = diag(ds)
    loc = c(X%*%Beta)
    Yc = Y - matrix(loc,d,N,byrow = F)
    
    
    mu = ds*e
    Mu = matrix(mu,d,N,byrow = F)
    Sigma.tilde = t(Rho*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    x.Omega.x = colSums(Yc * crossprod(Omega,Yc))
    mu.Omega.mu = colSums(Mu * crossprod(Omega,Mu))
    x.Omega.mu = colSums(Yc * crossprod(Omega,Mu))
    
    
    if(epsi==0){
      Const = - log(2 * pi) * (d/2) + log(2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }    
    sum(Const + x.Omega.mu  - logdet(Sigma) * 0.5 + (log(x.Omega.x+epsi) - 
                                                       (log(2 + mu.Omega.mu))) *(v/2) + 
          log(besselK(sqrt((2 + mu.Omega.mu) * (x.Omega.x+epsi)),v)))
  }
  loglike.MAL.2.OtherParm = function(parm,Y,X,tau,epsi=0,Beta){
    p = ncol(X)
    
    if(is.matrix(Y)){
      d = nrow(Y)    
      N = ncol(Y)
    }else{
      d = length(Y)
      N=1
    }
    
    v = (2 - d)/2
    tau.vec =rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    ds = parm[1:d]
    if(any(ds < 0)){return(NA)}
    thetas = parm[-c(1:d)]
    if(any(thetas < 0) | any(thetas > pi)){return(NA)}
    Ds = diag(ds)
    Rho = Theta2Corr(d,thetas)
    loc = c(X%*%Beta)
    Yc = Y - matrix(loc,d,N,byrow = F)
    
    
    mu = ds*e
    Mu = matrix(mu,d,N,byrow = F)
    Sigma.tilde = t(Rho*delta)*delta
    Sigma = as.symmetric.matrix(t(Sigma.tilde*ds)*ds)
    if(!is.positive.definite(Sigma)){
      Sigma = as.symmetric.matrix(make.positive.definite(Sigma))
    }
    Omega = as.inverse(Sigma)
    x.Omega.x = colSums(Yc * crossprod(Omega,Yc))
    mu.Omega.mu = colSums(Mu * crossprod(Omega,Mu))
    x.Omega.mu = colSums(Yc * crossprod(Omega,Mu))
    
    
    if(epsi==0){
      Const = - log(2 * pi) * (d/2) + log(2)
    }else{
      Const = 0.5*log(2) - log(sqrt(epsi)) - log(besselK(sqrt(2*epsi),1)) - log(2 * pi) * (d/2)
    }    
    sum(Const + x.Omega.mu  - logdet(Sigma) * 0.5 + (log(x.Omega.x+epsi) - 
                                                       (log(2 + mu.Omega.mu))) *(v/2) + 
          log(besselK(sqrt((2 + mu.Omega.mu) * (x.Omega.x+epsi)),v)))
  }
  
  # Functions to compute the gradient and the Hessian matrix
  Grad.central = function(parm,Y,X,tau,epsi=0){
    k=1e-4
    if(is.matrix(Y)){
      d = nrow(Y)    
    }else{
      d = length(Y)
    }
    
    Grad = mapply(function(x){
      parm.eval.1 = parm.eval.2 = parm
      parm.eval.1[x] = parm[x] + k
      parm.eval.2[x] = parm[x] - k
      if(d>2){
        ll.1 = loglike.MAL.2(parm.eval.1,Y,X,tau,epsi=epsi)
        ll.2 = loglike.MAL.2(parm.eval.2,Y,X,tau,epsi=epsi)
      }else{
        ll.1 = loglike.MAL(parm.eval.1,Y,X,tau,epsi=epsi)
        ll.2 = loglike.MAL(parm.eval.2,Y,X,tau,epsi=epsi)
      }
      deriv = (ll.1-ll.2)/(2*k)
      return(deriv)
    },x=1:length(parm))
    return(Grad)
  }
  Hessian.fun = function(parm,Y,X,tau,epsi=0){
    k=1e-4
    evals =   combinations(length(parm),2,repeats.allowed = T)
    d = nrow(Y)
    if(d > 2){
      ll = loglike.MAL.2(parm,Y,X,tau,epsi=epsi)    
    }else{
      ll = loglike.MAL(parm,Y,X,tau,epsi=epsi)
    }
    
    
    Hessian.vals = mapply(function(x){
      eval = evals[x,]
      if(length(unique(eval))==1){
        parm.pos = parm.neg = parm
        parm.pos[unique(eval)] = parm[unique(eval)] + k
        parm.neg[unique(eval)] = parm[unique(eval)] - k
        if(d > 2){
          ll.pos = loglike.MAL.2(parm.pos,Y,X,tau,epsi=epsi)
          ll.neg = loglike.MAL.2(parm.neg,Y,X,tau,epsi=epsi)
        }else{
          ll.pos = loglike.MAL(parm.pos,Y,X,tau,epsi=epsi)
          ll.neg = loglike.MAL(parm.neg,Y,X,tau,epsi=epsi)
        }
        fit = (ll.pos  + ll.neg - 2*ll)/k^2
      }else{
        parm.pos.pos = parm.pos.neg = parm.neg.neg = parm.neg.pos = parm
        parm.pos.pos[eval] = parm[eval] + k
        parm.pos.neg[eval] = parm[eval] + c(k,-k)
        parm.neg.neg[eval] = parm[eval] -k
        parm.neg.pos[eval] = parm[eval] + c(-k,k)
        if(d > 2){
          ll.pos.pos = loglike.MAL.2(parm.pos.pos,Y,X,tau,epsi=epsi)
          ll.pos.neg = loglike.MAL.2(parm.pos.neg,Y,X,tau,epsi=epsi)
          ll.neg.neg = loglike.MAL.2(parm.neg.neg,Y,X,tau,epsi=epsi)
          ll.neg.pos = loglike.MAL.2(parm.neg.pos,Y,X,tau,epsi=epsi)
        }else{
          ll.pos.pos = loglike.MAL(parm.pos.pos,Y,X,tau,epsi=epsi)
          ll.pos.neg = loglike.MAL(parm.pos.neg,Y,X,tau,epsi=epsi)
          ll.neg.neg = loglike.MAL(parm.neg.neg,Y,X,tau,epsi=epsi)
          ll.neg.pos = loglike.MAL(parm.neg.pos,Y,X,tau,epsi=epsi)
          
        }
        fit = (ll.pos.pos - ll.pos.neg - ll.neg.pos + ll.neg.neg)/(4*k^2)
      }
      return(fit)
    },x=1:nrow(evals))
    p = length(parm)
    Hessian = invvech(Hessian.vals)
    return(Hessian)
  }
  
  Grad.central.OtherParm = function(parm,Y,X,tau,epsi=0,Beta){
    k=1e-4
    if(is.matrix(Y)){
      d = nrow(Y)    
    }else{
      d = length(Y)
    }
    
    Grad = mapply(function(x){
      parm.eval.1 = parm.eval.2 = parm
      parm.eval.1[x] = parm[x] + k
      parm.eval.2[x] = parm[x] - k
      if(d>2){
        ll.1 = loglike.MAL.2.OtherParm(parm.eval.1,Y,X,tau,epsi=epsi,Beta)
        ll.2 = loglike.MAL.2.OtherParm(parm.eval.2,Y,X,tau,epsi=epsi,Beta)
      }else{
        ll.1 = loglike.MAL.OtherParm(parm.eval.1,Y,X,tau,epsi=epsi,Beta)
        ll.2 = loglike.MAL.OtherParm(parm.eval.2,Y,X,tau,epsi=epsi,Beta)
      }
      deriv = (ll.1-ll.2)/(2*k)
      return(deriv)
    },x=1:length(parm))
    return(Grad)
  }
  
  if(!is.vector(y)){return('y is not a vector')}
  if(!is.matrix(X)){return('X is not a matrix')}
  if(tau > 1 | tau < 0){return('tau is not between 0 and 1')}
  
  d= unique(table(ID))
  N = length(y)/d
  p = ncol(X)
  Y = matrix(y,d,N,byrow = F)
  
  # computing initial values
  if(is.null(parm.ini)){
    v= (2 - d)/2
    tau.vec = rep(tau,d)
    e =  (1-2*tau.vec)/(tau.vec*(1-tau.vec)) # constant
    delta =  sqrt(2/(tau.vec*(1-tau.vec))) # constant
    
    Var.Y = var(t(Y))    
    ds.ini = sqrt(diag(Var.Y)/( e^2 + delta^2 ))
    y.temp = y - rep(ds.ini*e,N)
    Beta.ini = lm(y.temp~X-1)$coef
    DD = 1/(ds.ini*delta)
    Inner = Var.Y - t(tcrossprod(e)*ds.ini)*ds.ini
    rho.ini = as.symmetric.matrix(t(Inner*DD)*DD)
    if(!is.positive.definite(rho.ini)){
      rho.ini = GenCor.Scaling(rho.ini)
    }
    if(d==2){
      rho.ini = rho.ini[upper.tri(rho.ini)]
      parm.ini = c(Beta.ini,ds.ini,rho.ini)    
      if(is.infinite(loglike.MAL(parm.ini,Y,X,tau,epsi=epsi)) | 
         is.na(loglike.MAL(parm.ini,Y,X,tau,epsi=epsi)) ){
        parm.ini = c(Beta.ini,ds.ini,rep(0,length(rho.ini)))
      }
      mm.est = parm.ini
    }else{
      theta.ini = Corr2Theta(rho.ini)
      parm.ini = c(Beta.ini,ds.ini,theta.ini)    
      if(is.infinite(sum(loglike.MAL.2(parm.ini,Y,X,tau,epsi=epsi))) | 
         anyNA(loglike.MAL.2(parm.ini,Y,X,tau,epsi=epsi)) ){
        parm.ini = c(Beta.ini,ds.ini,rep(pi/2,length(theta.ini)))
      }
      mm.est = c(Beta.ini,ds.ini,c(rho.ini[upper.tri(rho.ini)]))
    }
    
  }else{mm.est = parm.ini}
  
  if(epsi>0){
    # direct maximization of the log-likelihood
    if(d==2){
      Fit = tryCatch(optim(parm.ini,loglike.MAL,gr=Grad.central,method='BFGS',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                           tau=tau,epsi=epsi),error=function(e){e})
    }else{
      Fit = tryCatch(optim(parm.ini,loglike.MAL.2,gr=Grad.central,method='BFGS',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                           tau=tau,epsi=epsi),error=function(e){e})
      if(Fit$convergence > 2){
        return(Fit)
      }
    }
    
    if(is.null(Fit)){
      Fit$estimates = rep(NA,7)
      Fit$code = 1
      Fit$iterations = NA
      Fit$maximum = NA
    }
    
    Est = Fit$par
    
    Beta.H = Est[1:p]
    names(Beta.H) = paste('B',0:(p-1),sep = '')
    ds.H = Est[c(p+1):(p+d)]
    names(ds.H) = paste('d',1:d,sep = '')
    if(d==2){
      Psi.H = diag(d)
      Psi.H[upper.tri(Psi.H)] = Est[-c(1:(p+d))]
      Psi.H = as.matrix(forceSymmetric(Psi.H))
    }else{
      Psi.H = Theta2Corr(d,Est[-c(1:(p+d))])
    }
    
    A.inv = Hessian.fun(Fit$par,Y=Y,X=X,tau=tau,epsi=epsi)
    grad.final = Grad.central(Fit$par,Y=Y,X=X,tau=tau,epsi=epsi)
    PD.A = tryCatch(is.positive.definite(-A.inv),error=function(e){FALSE})
    # Computing the covariance matrix of the estimates
    if(PD.A){
      A = solve(A.inv)
      B = mapply(function(x){
        X.i = X[ID==x,]
        y.i = Y[,x]
        b.i = Grad.central(Est,y.i,X.i,tau,epsi=epsi)
        B.i = tcrossprod(b.i,b.i)
        return(vech(B.i))
      },x=1:N)
      B = invvech(apply(B,1,sum))
      V = A%*%B%*%A
    }else{
      V = matrix(NA,nrow(A.inv),ncol(A.inv))
    }
    
    Control = c(Fit$convergence,Fit$counts[1],Fit$value)
    names(Control) = c('Convergence','Iterations','logLikelihood')
    
    Estimates = list(Beta=Beta.H,ds = ds.H,Psi = Psi.H,V=V,Control=Control,
                     Deriv = list(Gradient = grad.final,Hessian = A.inv))
  }else{
    # maximization of the log-likelihood in two steps (epsi=0)
    step = 0
    tol=200
    lls = 0
    parmH=parm.ini
    parm.test = parm.ini
    if(d==2){
      while(tol > 1e-8 & step < 50){
        fit1 = optim(parmH[-c(1:p)],loglike.MAL.OtherParm,gr=Grad.central.OtherParm,method='BFGS',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                     tau=tau,epsi=0,Beta=parmH[1:p])
        
        parm.test[-c(1:p)] = fit1$par
        rho = parm.test[p+d+1]
        fit2 = optim(parm.test[c(1:p)],loglike.MAL.Beta,gr=NULL,method='Nelder-Mead',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                     tau=tau,epsi=0,ds=parm.test[(p+1):(p+d)],rho=rho)
        parm.test[c(1:p)] = fit2$par
        parm.diff = parm.test - parmH
        eval.diff = abs(fit2$value - lls)
        tol = min(crossprod(parm.diff),eval.diff)
        
        parmH = parm.test
        lls=fit2$value
        step = step +1
      }
      
    }else{
      while(tol > 1e-8 & step < 50){
        fit1 = optim(parmH[-c(1:p)],loglike.MAL.2.OtherParm,gr=Grad.central.OtherParm,method='BFGS',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                     tau=tau,epsi=0,Beta=parmH[1:p])
        
        parm.test[-c(1:p)] = fit1$par
        Rho = Theta2Corr(d,parm.test[(p+d+1):(length(parm.test))])
        fit2 = optim(parm.test[c(1:p)],loglike.MAL.2.Beta,gr=NULL,method='Nelder-Mead',control=list(maxit=5000,fnscale=-1),Y=Y,X=X,
                     tau=tau,epsi=0,ds=parm.test[(p+1):(p+d)],Rho=Rho)
        parm.test[c(1:p)] = fit2$par
        parm.diff = parm.test - parmH
        eval.diff = abs(fit2$value - lls)
        tol = max(crossprod(parm.diff),eval.diff)
        
        parmH = parm.test
        lls=fit2$value
        step = step +1
      }
    }
    
    Beta.H = parmH[1:p]
    names(Beta.H) = paste('B',0:(p-1),sep = '')
    ds.H = parmH[c(p+1):(p+d)]
    names(ds.H) = paste('d',1:d,sep = '')
    if(d==2){
      Psi.H = diag(d)
      Psi.H[upper.tri(Psi.H)] = parmH[-c(1:(p+d))]
      Psi.H = as.matrix(forceSymmetric(Psi.H))
    }else{
      Psi.H = Theta2Corr(d,parmH[-c(1:(p+d))])
    }
    V = matrix(NA,length(parmH),length(parmH))
    Control = c(fit2$convergence,step,fit2$value)
    Gradient = rep(NA,length(parmH))
    Hessian = matrix(NA,length(parmH),length(parmH))
    Estimates = list(Beta=Beta.H,ds = ds.H,Psi = Psi.H,V=V,Control=Control,
                     Deriv = list(Gradient = Gradient,Hessian = Hessian))
  }
  # the output consists of a list with [1] estimates of Beta, [2] estimates of the diagonal elements of the Delta matrix, 
  # [3] estimate of the correlation matrix, [4] covariance matrix of the estimates, [5] Control: convergence, iterations and log-likelihood, 
  # [6] Gradient and Hessian
  return(Estimates)
}


## setting the parameters for the simulations
Beta = matrix(c(4,2,1,5,-3,1),nrow=2,byrow = T)
Vars = c(1,4,1,4)
Corr.t = invvech(c(1,0.5,1))
Corr.q = invvech(c(1,0.5,1))
Sigma = cor2cov(kronecker(Corr.t,Corr.q),sqrt(Vars))  
# Generating the simulated data
Data = genData(N=200,Beta,Sigma,dist='NORM',Seed=123)
p = length(Beta)
y = Data$Y
X = as.matrix(Data[,-c(1:2)])
ID = Data$ID

### MLE using different values for epsilon
epsi = c(0,0.001,0.01,0.1,0.2,0.5)
mle.25 = pbmapply(function(x){
  fit = mle.MAL(y,X,ID,0.25,NULL,epsi=epsi[x])  
  return(fit)
},x=1:length(epsi),SIMPLIFY = F)
mle.50 = pbmapply(function(x){
  fit = mle.MAL(y,X,ID,0.5,NULL,epsi=epsi[x])  
  return(fit)
},x=1:length(epsi),SIMPLIFY = F)
mle.90 = pbmapply(function(x){
  fit = mle.MAL(y,X,ID,0.9,NULL,epsi=epsi[x])  
  return(fit)
},x=1:length(epsi),SIMPLIFY = F)

### Results of Table F.11
# Estimates with different values of epsilon
EST.25 = mapply(function(x){
  mle.25[[x]]$Beta  
},x=1:length(mle.25))
EST.50 = mapply(function(x){
  mle.50[[x]]$Beta  
},x=1:length(mle.25))
EST.90 = mapply(function(x){
  mle.90[[x]]$Beta  
},x=1:length(mle.25))


# Standard error with different values of epsilon
SE.25 = mapply(function(x){
  sqrt(diag(mle.25[[x]]$V))[1:6]
},x=1:length(mle.25))
SE.50 = mapply(function(x){
  sqrt(diag(mle.50[[x]]$V))[1:6]
},x=1:length(mle.25))
SE.90 = mapply(function(x){
  sqrt(diag(mle.90[[x]]$V))[1:6]
},x=1:length(mle.25))

# Gradient with different values of epsilon
GRAD.25 = mapply(function(x){
  mle.25[[x]]$Deriv$Gradient[1:6]
},x=1:length(mle.25))
GRAD.50 = mapply(function(x){
  mle.50[[x]]$Deriv$Gradient[1:6]
},x=1:length(mle.25))
GRAD.90 = mapply(function(x){
  mle.90[[x]]$Deriv$Gradient[1:6]
},x=1:length(mle.25))


## results of Table F.11
EST = rbind(EST.25,EST.50,EST.90)
SE = rbind(SE.25,SE.50,SE.90)
GRAD = rbind(GRAD.25,GRAD.50,GRAD.90)

